from google.colab import drive
drive.mount('/content/gdrive')
import pandas as pd
import numpy as np
import os
import matplotlib
import matplotlib.pyplot as plt
import matplotlib.image as mpimg
from mpl_toolkits.axes_grid1 import ImageGrid
import seaborn as sns
import re
from keras.preprocessing import image
def data_information(data_dir):
class_names = os.listdir(data_dir)
class_names.remove("Soybean___healthy")
class_names = set(class_names)
species = set()
for a in class_names:
idx = a.find("___")
b = a[:idx]
species.add(b)
disease = set()
for x in class_names:
idx2 = x.find("___")
y = x[idx2+3:]
disease.add(y)
return class_names, species, disease
data_dir = "PlantVillage/train"
class_names, species, disease = data_information(data_dir)
print("no of classes are:", len((class_names)))
print("no of species are:", len((species)))
print("no of diseases are:", len((disease)))
class_names
species
disease
image_counts = {}
test_count = []
for name in class_names:
path = data_dir + "/" + name
path_test = "PlantVillage/test" + "/" + name
image_list = os.listdir(path)
image_list_test = os.listdir(path_test)
length_test = len(image_list_test)
test_count.append(length_test)
length = len(image_list)
image_counts[name] = length
count = pd.DataFrame.from_dict(image_counts, orient = "index")
count.columns = ["train_count"]
count["test_count"] = test_count
count
ax = count.plot(kind = "bar", figsize=(20,10))
ax.set_xlabel("class_name")
ax.set_ylabel("count")
comb = []
for a in disease:
for b in species:
s = b + "___" + a
if s in class_names:
out = (a,b,1)
else:
out = (a,b,0)
comb.append(out)
data = pd.DataFrame(comb)
data.columns = ["Disease","Specie","class"]
data_matrix = data.pivot_table(values = "class", index = "Disease", columns = "Specie")
data_matrix
data = []
for class_name in (class_names):
path = "PlantVillage/train" + "/" + class_name
for file in os.listdir(path):
data.append([path + "/" + file, class_name])
data_train = pd.DataFrame(data, columns=['file', 'class_name'])
data_train = data_train.drop_duplicates()
print(len(data_train))
data_train.head()
data_test = []
for class_name in (class_names):
path = "PlantVillage/test" + "/" + class_name
for file in os.listdir(path):
data_test.append([path + "/" + file, class_name])
test_data = pd.DataFrame(data_test, columns=['file', 'class_name'])
test_data = test_data.drop_duplicates()
print(len(test_data))
test_data.head()
print("no of training images are:", len(data_train))
print("no of testing images are:", len(data_test))
import cv2
import numpy as np
from matplotlib import pyplot as plt
from google.colab.patches import cv2_imshow
img = cv2.imread(data_train["file"][10], -1)
cv2_imshow(img)
color = ('b','g','r')
for channel,col in enumerate(color):
histr = cv2.calcHist([img],[channel],None,[256],[0,256])
plt.plot(histr,color = col)
plt.xlim([0,256])
plt.ylabel(' Number of pixels in the image')
plt.xlabel(' Brightness value')
plt.show()
#plt.title('Histogram for color scale picture')
def read_img(filepath, size):
img = image.load_img( filepath, target_size=size)
img = image.img_to_array(img)
return img
def format_name(s):
return re.sub('_+', ' ', s)
num_classes = len(class_names)
fig = plt.figure(1, figsize=(10, 40))
grid = ImageGrid(fig, 111, nrows_ncols=(num_classes, 10), axes_pad=0.05)
i = 0
for label, class_name in enumerate(class_names):
for filepath in data_train[data_train['class_name'] == class_name]['file'].values[:10]:
ax = grid[i]
img = read_img(filepath, (256, 256))
ax.imshow(img / 255.)
ax.axis('off')
if i % 10 == 10 - 1:
name = format_name(class_name)
ax.text(260, 112, name , verticalalignment='center')
i += 1
plt.show();
from sklearn.model_selection import train_test_split
train_files, valid_files, train_target, valid_target = train_test_split(data_train["file"], data_train["class_name"], test_size=0.2, random_state=42)
train = np.array(train_files)
valid = np.array(valid_files)
target_train = pd.get_dummies(train_target).values
target_valid = pd.get_dummies(valid_target).values
test = np.array(test_data["file"])
target_test = pd.get_dummies(test_data["class_name"]).values
print("final training data size is:",train.shape[0])
print("final validation data size is:",valid.shape[0])
print("final testing data size is:",test.shape[0])
from keras.preprocessing import image
from tqdm import tqdm
def path_to_tensor(img_path):
# loads RGB image as PIL.Image.Image type
img = image.load_img(img_path, target_size=(64, 64))
# convert PIL.Image.Image type to 3D tensor with shape (224, 224, 3)
x = image.img_to_array(img)
# convert 3D tensor to 4D tensor with shape (1, 224, 224, 3) and return 4D tensor
return np.expand_dims(x, axis=0)
def paths_to_tensor(img_paths):
list_of_tensors = [path_to_tensor(img_path) for img_path in tqdm(img_paths)]
return np.vstack(list_of_tensors)
from PIL import ImageFile
ImageFile.LOAD_TRUNCATED_IMAGES = True
# pre-process the data for Keras
train_tensors = paths_to_tensor(train).astype('float32')/255
valid_tensors = paths_to_tensor(valid).astype('float32')/255
test_tensors = paths_to_tensor(test).astype('float32')/255
from keras.layers import Conv2D, MaxPooling2D, GlobalAveragePooling2D
from keras.layers import Dropout, Flatten, Dense
from keras.models import Sequential
model = Sequential()
### TODO: Define your architecture.
model.add(Conv2D(filters = 16, kernel_size = 2, padding = "same", activation = "relu", input_shape = (64,64,3)))
model.add(MaxPooling2D(pool_size = 2))
#model.add(Dropout(0.2))
model.add(Conv2D(filters = 32, kernel_size = 2, padding = "same", activation = "relu"))
model.add(MaxPooling2D(pool_size = 2))
#model.add(Dropout(0.2))
model.add(Conv2D(filters = 64, kernel_size = 2, padding = "same", activation = "relu"))
model.add(MaxPooling2D(pool_size = 2))
model.add(Conv2D(filters = 128, kernel_size = 2, padding = "same", activation = "relu"))
model.add(MaxPooling2D(pool_size = 2))
#model.add(Conv2D(filters = 256, kernel_size = 2, padding = "same", activation = "relu"))
#model.add(MaxPooling2D(pool_size = 2))
#model.add(Dropout(0.2))
model.add(Flatten())
model.add(Dense(500, activation='relu'))
#model.add(Dropout(0.4))
model.add(Dense(37, activation='softmax'))
model.summary()
model.compile(optimizer='rmsprop', loss='categorical_crossentropy', metrics=['accuracy'])
from keras.callbacks import ModelCheckpoint
epochs = 50
checkpointer = ModelCheckpoint(filepath='weights.final.from_scratch.hdf5',
verbose=1, save_best_only=True, monitor='val_acc')
model.fit(train_tensors, target_train,
validation_data=(valid_tensors, target_valid),
epochs=epochs, batch_size=20, callbacks=[checkpointer], verbose=1)
model.load_weights('weights.final.from_scratch.hdf5')
plant_predictions = [np.argmax(model.predict(np.expand_dims(tensor, axis=0))) for tensor in test_tensors]
# report test accuracy
test_accuracy = 100*np.sum(np.array(plant_predictions)==np.argmax(target_test, axis=1))/len(plant_predictions)
print('Test accuracy: %.4f%%' % test_accuracy)
import cv2
class_names = list(class_names)
def prediction_scratch(path):
tensor = path_to_tensor(path)
imag = cv2.imread(path)
plt.imshow(imag)
plt.show()
prediction = class_names[np.argmax(model.predict((tensor)))]
a = list(test)
index = a.index(path)
print("Predicted class of image is :", prediction)
prediction_scratch(test[5000])
prediction_scratch(test[100])
prediction_scratch(test[3600])